#clean data
data_student<-data_student_raw %>%
  group_by(an_adm,Cod_SIRUTA2_hs_adm,judet_adm) %>%
  mutate(n=n()) %>%
  mutate(n_hs=n_distinct(Cod_SIIIR_hs_adm[!is.na(Cod_SIIIR_hs_adm)])) %>%
  mutate(n_hs_town_group=case_when(n_hs==1 ~"1",
                                   n_hs==2 ~"2",
                                   n_hs==3 ~ "3",
                                   n_hs %in% 4:15 ~ "4-15",
                                   n_hs %in% 16:999 ~ "16+")) %>%
  mutate(n_hs_town_group=factor(n_hs_town_group,levels=c("1","2","3","4-15","16+"))) %>%
  mutate(n_s_adm=length(Cod_SIIIR_hs_adm[!is.na(entrance_perc) & !is.na(Cod_SIIIR_hs_adm)])) %>%
  ungroup() %>%
  mutate(n_s_adm_bin=cut(n_s_adm,seq(from=0,to=100000,by=50))) %>%
  mutate(n_s_adm_bin_big=cut(n_s_adm,c(seq(from=0,to=2500,by=100),5000,25000),dig.lab=10)) %>%
  mutate( quartile=case_when(
    entrance_perc>=0.75 ~ 4,
    entrance_perc>=0.5 ~ 3,
    entrance_perc>=0.25 ~ 2,
    entrance_perc>=0 ~ 1,
    TRUE ~ NA_real_
  )) %>%
  mutate() %>%
  dplyr::select(entrance_perc,grad_perc,quartile,n_s_adm_bin,n_s_adm_bin_big,n_s_adm,
                n_hs,n_hs_town_group,an_bac,Cod_SIRUTA2_hs_adm,an_adm,judet_adm)

#regression models
model12<-feols(data=data_student,I((quartile==1)*100)~n_hs_town_group|n_s_adm_bin+judet_adm,
               cluster=~judet_adm)
model22<-feols(data=data_student,I((quartile==2)*100)~n_hs_town_group|n_s_adm_bin+judet_adm,
               cluster=~judet_adm)
model32<-feols(data=data_student,I((quartile==3)*100)~n_hs_town_group|n_s_adm_bin+judet_adm,
               cluster=~judet_adm)
model42<-feols(data=data_student,I((quartile==4)*100)~n_hs_town_group|n_s_adm_bin+judet_adm,
               cluster=~judet_adm)
model_ind<-feols(data=data_student,I(entrance_perc*100)~n_hs_town_group|n_s_adm_bin+judet_adm,
                 cluster=~judet_adm)
summary(model12)
summary(model22)
summary(model32)
summary(model42)
summary(model_ind)



f_big<-function(x) format(x, big.mark=",", scientific=FALSE, nsmall=1,digits=1)
f <- function(x) format(round(x/1000000,1) , nsmall = 1)
glance_custom.fixest <- function(x, ...) {
  dv <- insight::get_response(x)
  dv <- sprintf("%.2f", mean(dv, na.rm = TRUE))
  data.table::data.table(`Mean of DV` = dv)
}


#print table 3
options(modelsummary_format_numeric_latex = "plain")
variables<-c("n_hs_town_group2"="2 HS-Town","n_hs_town_group3"="3 HS-Town","n_hs_town_group4-15"="4-15 HS-Town",
             "n_hs_town_group16+"="16+ HS-Town")


current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
fileConn<-file("05_table_03.txt")
writeLines(print(
                 modelsummary(list("mean"=model_ind,
                                   "q1"=model12,
                                   "q2"=model22,
                                   "q3"=model32,
                                   "q4"=model42
                 ),
                 estimate="{estimate}{stars}",
                 statistic = "std.error",
                 stars=c('$^{*}$'=0.1,'$^{**}$'=0.05,'$^{***}$'=0.01),
                 gof_map=list(list("raw" = "nobs", "clean" = "N", "fmt" = f_big),
                              list("raw" = "r.squared", "clean" = "R$^2$",fmt="%.2f"),
                              list("raw"="Mean of DV","clean"="DV Mean",fmt="%.2f")),
                 metrics="R2",
                 fmt=2,
                 output="latex",
                 coef_rename=variables,
                 escape=F)
), fileConn)
close(fileConn)


#print figure for appendix
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
pdf("iv_overlap_appendix.pdf" , width = 7 , height = 5) 
ggplot(data_student %>%   filter(!is.na(n_s_adm_bin_big)),aes(x=n_s_adm_bin_big,y=n_hs))+
  geom_boxplot()+
  # geom_point(size=1)+
  theme(strip.text.x = element_text(size=18),
        axis.text.x=element_text(size=14),
        axis.text.y = element_text(size=14),  
        axis.title.x = element_text(size=14),
        axis.title.y = element_text(size=14),
        legend.title=element_text(size=18),
        legend.text=element_text(size=18))+
  #ylim(-0.1,0.3)+
  xlab("Number of Admitted Students")+
  ylab("Number of High Schools")+
  #scale_x_continuous(breaks=0:10)+
  scale_y_continuous(breaks=seq(from = -30, to = 200, by = 10))+ 
  theme(legend.position = "none")+
  coord_flip()
dev.off()

